
mutable_params = [
    "model_name_or_path",
    "tokenizer_name_or_path",
    "input_dir",
    "output_dir",
    "split",
    "max_seq_length",
    "per_device_train_batch_size",
    "per_device_eval_batch_size",
    "use_flash_attention",
    "use_fused_rms_norm",
    "fp16",
    "fp16_opt_level",
    "scale_loss",
    "learning_rate",
    "min_learning_rate",
    "max_steps",
    "save_steps",
    "weight_decay",
    "warmup_ratio",
    "max_grad_norm",
    "logging_steps",
    "dataloader_num_workers",
    "eval_steps",
    "disable_tqdm",
    "continue_training",
    "recompute",
    "do_train",
    "do_eval",
    "data_impl",
    "gradient_accumulation_steps",
    "tensor_parallel_degree",
    "pipeline_parallel_degree",
    "virtual_pp_degree",
    "sequence_parallel",
    "distributed_dataloader",
]

mutable_params += ["local_rank", "dist_backend"]